package com.tcs.log.map;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 日志中垃圾数据扔掉
 * 
 * @Title: etlLogMapper.java
 * @Package com.tcs.log.map
 * @author 神经刀
 * @date 2018年4月7日
 * @version V1.0
 */
public class EtlLogMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

	private String[] keyArray = { "master", "slave1", "slave2" };

	private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
	
	private final Logger logger = LoggerFactory.getLogger(EtlLogMapper.class);

	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		StringBuffer sb = new StringBuffer(value.toString().length());
		String dateStr = "";
		String se = "";
		String line = value.toString();
		for (String keyword : keyArray) {
			if (line.contains(keyword) && !line.contains("result") && !line.contains("URL") && line.length() < 150) {
				logger.info(" line : {} , keyword : {} " , line , keyword);
				try {
					dateStr = line.substring(0, line.indexOf(","));
					sb.append(dateStr).append(",");
					try {
						dateStr = sdf.parse(dateStr).getTime() / 1000 + "";
						sb.append(dateStr).append(",");
					} catch (ParseException e) {
						sb.append(new Date().getTime() / 1000).append(",");
						logger.error("" , e);
					}
					se = line.substring(23, line.length());
					sb.append(se);
					logger.debug(" key : {} " , sb.toString());
					context.write(new Text(sb.toString()), new IntWritable(0));
				} catch (Exception e) {
					logger.error("" , e);
				}
				continue;
			}
		}
	}
}